###########################################################################
##### Why Onset Matters: Warfare, Severity, and Duration in Civil War #####
###########################################################################


######## Require packages
require(readr)
require(tidyverse)
require(nnet)
require(stargazer)
require(survminer)
require(survival)
require(ggpubr)
require(gridExtra)
require(pscl)



######## Import data
datonset <- read_csv("DatOnset.csv")
datonset <- subset(datonset, select = -...1)

## Don't use exponential
options(scipen = 999)





###################### Table 1 ######################

## Get descriptive statistics for onset types pre-1991
pre1991 <- datonset %>%
  subset(startyear>1943) %>%
  subset(startyear<1991)
# Get statistics
table(pre1991$onsettype)

## Get descriptive statistics for onset types post-1991
post1991 <- datonset %>%
  subset(startyear>1990)
# Get statistics
table(post1991$onsettype)

## Get descriptive statistics for onset types for 1944-2020
table(datonset$onsettype)





###################### Table 2 ######################

## Get descriptive statistics for warfare pre-1991
# Warfare
table(pre1991$warfare)
# Warfare2
table(pre1991$warfare2)

## Get descriptive statistics for warfare post-1991
table(post1991$warfare)
# Warfare2
table(post1991$warfare2)

## Get descriptive statistics for onset types for both periods
# Warfare
table(datonset$warfare)
# Warfare2
table(datonset$warfare2)





###################### Table 3 ######################

## Get descriptive statistics for onset intensity
table(datonset$onsetint2)

## Get descriptive statistics for conflict intensity (60%)
table(datonset$intensity60)

## Get descriptive statistics for conflict intensity (75%)
table(datonset$intensity75)





###################### Table 4 ######################

# Change class onsettype variable
datonset$onsettype = as.factor(datonset$onsettype)

# Change the baseline category of onset variable
datonset$onsettype_new <- factor(datonset$onsettype, levels = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"))

# Change the baseline category of warfare variable
datonset$warfare_new <- factor(datonset$warfare, levels = c("Irregular", "Conventional", "SNC"))

##### Model 1 (bivariate)
warfare1m = multinom(warfare_new ~ onsettype_new, 
                     data = datonset, 
                     model = TRUE)
# Get the number of observations
nrow(fitted(warfare1m))

##### Model 2 (Limited controls)
warfare2m = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
                     data = datonset, 
                     model = TRUE)
# Get the number of observations
nrow(fitted(warfare2m))

##### Model 3 (Additional identity and goals controls)
warfare3m = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
                     + marxist + ethnowar + secessionist + islam, 
                     data = datonset, 
                     model = TRUE)
# Get the number of observations
nrow(fitted(warfare3m))

##### Model 4 (Additional structural controls)
warfare4m = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
                     + fl_oil + milper, 
                     data = datonset, 
                     model = TRUE)
# Get the number of observations
nrow(fitted(warfare4m))

##### Model 5 (Full controls)
warfare5m = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                       fl_oil + milper + marxist + ethnowar + secessionist + islam, 
                     data = datonset, 
                     model = TRUE)
# Get the number of observations
nrow(fitted(warfare5m))

# Function to compute McFadden's pseudo-R^2
compute_pseudo_r2 <- function(model) {
  # Log-likelihood of the fitted model
  logLik_fitted <- logLik(model)
  
  # Fit the null model (intercept-only model)
  null_model <- multinom(warfare_new ~ 1, data = datonset)
  
  # Log-likelihood of the null model
  logLik_null <- logLik(null_model)
  
  # Compute McFadden's R-squared
  R2_McFadden <- 1 - (as.numeric(logLik_fitted) / as.numeric(logLik_null))
  return(R2_McFadden)
}

# Compute McFadden's pseudo-R^2 for each model
R2_warfare1m <- compute_pseudo_r2(warfare1m)
R2_warfare2m <- compute_pseudo_r2(warfare2m)
R2_warfare3m <- compute_pseudo_r2(warfare3m)
R2_warfare4m <- compute_pseudo_r2(warfare4m)
R2_warfare5m <- compute_pseudo_r2(warfare5m)

# Return values
cat("McFadden's R-squared for Model 1:", R2_warfare1m, "\n")
cat("McFadden's R-squared for Model 2:", R2_warfare2m, "\n")
cat("McFadden's R-squared for Model 3:", R2_warfare3m, "\n")
cat("McFadden's R-squared for Model 4:", R2_warfare4m, "\n")
cat("McFadden's R-squared for Model 5:", R2_warfare5m, "\n")

## Summary table
stargazer(warfare1m, warfare2m, warfare3m, warfare4m, warfare5m, type = "text")





################## Table A2 - NEEDED FOR FIGURE 1 ##################

### Logit model for warfare - Table A2 (appendix) ###

# Create numeric version of warfare2 variable
datonset <- datonset %>%
  mutate(warfare2b = ifelse(warfare2 == "Symmetric", 1, 0))

##### Model 1 (bivariate)
warfare1 = glm(warfare2b ~ onsettype_new, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 2 (Limited controls)
warfare2 = glm(warfare2b ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
warfare3 = glm(warfare2b ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
               + marxist + ethnowar + secessionist + islam, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
warfare4 = glm(warfare2b ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
               + fl_oil + milper, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 5 (Full controls)
warfare5 = glm(warfare2b ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                 fl_oil + milper + marxist + ethnowar + secessionist + islam, 
               data = datonset, 
               family = binomial(link = "logit"))

## Get all the models in one table
stargazer(warfare1, warfare2, warfare3, warfare4, warfare5, type = "text")

# Get pseudo R-squared for each model
pR2(warfare1)["McFadden"]
pR2(warfare2)["McFadden"]
pR2(warfare3)["McFadden"]
pR2(warfare4)["McFadden"]
pR2(warfare5)["McFadden"]





###################### Figure 1 ######################

### Plot predicted probabilities
# Create new data frame with assigned values for all controls
newdatawarfare <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                            lngdppc = mean(lngdppc, na.rm = TRUE),
                                            post_cw = median(post_cw),
                                            mountain2 = mean(mountain2, na.rm = TRUE),
                                            polity = mean(polity, na.rm = TRUE),
                                            ethfrac = mean(ethfrac, na.rm = TRUE),
                                            fl_oil = median(fl_oil, na.rm = TRUE),
                                            milper = mean(milper, na.rm = TRUE),
                                            marxist = median(marxist),
                                            ethnowar = median(ethnowar),
                                            secessionist = median(secessionist),
                                            islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_warfare <- predict(warfare5, newdatawarfare, type = "response", se.fit = TRUE)
predprod_warfare$pred.prob <- predprod_warfare$fit

# Calculate the bonds on the CIs for each pred. prob.
predprod_warfare$lowerCI <- predprod_warfare$fit - (1.96*predprod_warfare$se.fit)
predprod_warfare$upperCI <- predprod_warfare$fit + (1.96*predprod_warfare$se.fit)

# Convert list into dataframe
predprod_warfare <- as.data.frame(predprod_warfare)

# Add onset type variable
predprod_warfare$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

# Plot the predicted probabilities with their respective CIs
ggplot(predprod_warfare, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Table 5 ######################

##### 60 percent threshold #####
##### Model 1 (bivariate)
conint1b = glm(intensity60 ~ onsettype_new,
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 2 (Limited controls)
conint2b = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
conint3b = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                + marxist + secessionist + islam, 
              data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
conint4b = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil, 
              data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
conint5b = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil + marxist + secessionist + islam,
              data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(conint1b, conint2b, conint3b, conint4b, conint5b, type = "text")

# Get pseudo R-squared for each model
pR2(conint1b)["McFadden"]
pR2(conint2b)["McFadden"]
pR2(conint3b)["McFadden"]
pR2(conint4b)["McFadden"]
pR2(conint5b)["McFadden"]





###################### Figure 2 ######################

# Create new data frame with assigned values for all controls
newdataconseverity60 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                                  lngdppc = mean(lngdppc, na.rm = TRUE),
                                                  post_cw = median(post_cw),
                                                  mountain2 = mean(mountain2, na.rm = TRUE),
                                                  polity = mean(polity, na.rm = TRUE),
                                                  ethfrac = mean(ethfrac, na.rm = TRUE),
                                                  milper = mean(milper, na.rm = TRUE),
                                                  lnpop = mean(lnpop, na.rm = TRUE),
                                                  fl_oil = median(fl_oil, na.rm = TRUE),
                                                  marxist = median(marxist),
                                                  secessionist = median(secessionist),
                                                  islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_conseverity60 <- predict(conint5b, newdataconseverity60, type = "response", se.fit = TRUE)
predprod_conseverity60$pred.prob <- predprod_conseverity60$fit

# Convert list into dataframe
predprod_conseverity60 <- as.data.frame(predprod_conseverity60)

# Calculate the bonds on the CIs for each pred. prob.
predprod_conseverity60$lowerCI <- predprod_conseverity60$fit - (1.96*predprod_conseverity60$se.fit) # 1.645 for 90% CI
predprod_conseverity60$upperCI <- predprod_conseverity60$fit + (1.96*predprod_conseverity60$se.fit)

# Add onset type variable
predprod_conseverity60$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_conseverity60, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Figure 3 ######################

# Drop case of Northern Ireland from the analysis because clear outlier.
datonset2 <- subset(datonset, conflict_id != 178)

## Estimate Kaplan-Meier curves to see the survival rate in years
KM_1 <- survfit(Surv(duration_years,ended) ~ 1, data = datonset2)

# Plot the survival function
# All civil wars
dur_gen <- ggsurvplot(KM_1,
                      data = datonset2,
                      palette = "slategray4",
                      ggtheme = theme_bw(),
                      font.x = 20,
                      font.y = 20,
                      font.legend = 20,
                      legend.title = "",
                      legend.labs = "All civil wars")

# By types of onset
KM_2 <- survfit(Surv(duration_years, ended) ~ onsettype, data = datonset2)
dur_type <- ggsurvplot(KM_2, 
                       data = datonset2,
                       legend.title = "",
                       legend.labs = c("Coup", "Insurrectionary protests", "Peripheral challenge", "State disintegration"),
                       palette = c("darkslategrey", "indianred3", "azure3", "bisque3"),
                       ggtheme = theme_bw(),
                       font.x = 20,
                       font.y = 20,
                       font.legend = 20)

# Both graphs in one figure
dur_final <- ggarrange(plotlist = list(dur_gen$plot, dur_type$plot),
                       ncol = 1, nrow = 2)
dur_final

# Test whether the survival functions are statistically different for these types of conflict
# Store duration data
surv_years <- Surv(time = datonset2$duration_years, event = datonset2$ended)

# Check stat significance
logrank = survdiff(surv_years ~ onsettype, data = datonset2)
logrank # p = 0.000002





###################### Table 6 ######################
##### Model 1 (bivariate)
onset_cox <- coxph(surv_years ~ onsettype_new, data = datonset2)

##### Model 2 (Limited controls)
onset_cox2 <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + marxist + ethfrac, 
                    data = datonset2)

##### Model 3 (Additional identity and goals controls)
onset_cox3 <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + marxist + ethfrac + secessionist + islam, 
                    data = datonset2)

##### Model 4 (Additional structural controls)
onset_cox4 <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + polity + fl_oil + marxist + ethfrac, 
                    data = datonset2)

##### Model 5 (Full controls)
onset_cox5 <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + polity + fl_oil + marxist + ethfrac + secessionist + islam, 
                    data = datonset2)

### Summary of results
stargazer(onset_cox, onset_cox2, onset_cox3, onset_cox4, onset_cox5, type = "text")





############################## Appendix ##############################

################## Table A1 ################## 

summary(datonset$lngdppc)
sd(datonset$lngdppc, na.rm = TRUE)

summary(datonset$fl_oil)
sd(datonset$fl_oil, na.rm = TRUE)

summary(datonset$postcw)
sd(datonset$postcw, na.rm = TRUE)

summary(datonset$polity)
sd(datonset$polity, na.rm = TRUE)

summary(datonset$ethnowar)
sd(datonset$ethnowar, na.rm = TRUE)

summary(datonset$milper)
sd(datonset$milper, na.rm = TRUE)

summary(datonset$mountain2)
sd(datonset$mountain2, na.rm = TRUE)

summary(datonset$marxist)
sd(datonset$marxist, na.rm = TRUE)

summary(datonset$secessionist)
sd(datonset$secessionist, na.rm = TRUE)

summary(datonset$islam)
sd(datonset$islam, na.rm = TRUE)

summary(datonset$ethfrac)
sd(datonset$ethfrac, na.rm = TRUE)

datonset$relfrac <- as.numeric(datonset$relfrac)
summary(datonset$relfrac)
sd(datonset$relfrac, na.rm = TRUE)

summary(datonset$lnpop)
sd(datonset$lnpop, na.rm = TRUE)





################## Table A2 ################## 

# See script for main text.





################## Table A3 ################## 

### Warfare multinomial 5-year lag GDP ###

##### Model 1 (bivariate) #####
lag5_1 = multinom(warfare_new ~ onsettype_new, 
                  data = datonset, 
                  model = TRUE)
# Get the number of observations
nrow(fitted(lag5_1))

##### Model 2 (Limited controls) #####
lag5_2 = multinom(warfare_new ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac, 
                  data = datonset, 
                  model = TRUE)
# Get the number of observations
nrow(fitted(lag5_2))

##### Model 3 (Additional identity and goals controls) #####
lag5_3 = multinom(warfare_new ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac
                  + marxist + ethnowar + secessionist + islam, 
                  data = datonset, 
                  model = TRUE)
# Get the number of observations
nrow(fitted(lag5_3))

##### Model 4 (Additional structural controls) #####
lag5_4 = multinom(warfare_new ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac
                  + fl_oil + milper, 
                  data = datonset, 
                  model = TRUE)
# Get the number of observations
nrow(fitted(lag5_4))

##### Model 5 (Full controls) #####
lag5_5 = multinom(warfare_new ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac +
                    fl_oil + milper + marxist + ethnowar + secessionist + islam, 
                  data = datonset, 
                  model = TRUE)
# Get the number of observations
nrow(fitted(lag5_5))

## Get all the models in one table
stargazer(lag5_1, lag5_2, lag5_3, lag5_4, lag5_5, type = "text")

### See script for main text to create function to calculate pseudo R^2

# Compute McFadden's pseudo-R^2 for each model
R2_lag5_1 <- compute_pseudo_r2(lag5_1)
R2_lag5_2 <- compute_pseudo_r2(lag5_2)
R2_lag5_3 <- compute_pseudo_r2(lag5_3)
R2_lag5_4 <- compute_pseudo_r2(lag5_4)
R2_lag5_5 <- compute_pseudo_r2(lag5_5)

# Return values
cat("McFadden's R-squared for Model 1:", R2_lag5_1, "\n")
cat("McFadden's R-squared for Model 2:", R2_lag5_2, "\n")
cat("McFadden's R-squared for Model 3:", R2_lag5_3, "\n")
cat("McFadden's R-squared for Model 4:", R2_lag5_4, "\n")
cat("McFadden's R-squared for Model 5:", R2_lag5_5, "\n")





################## Table A4 ################## 

### Warfare logit 5-year lag GDP ###

# Create numeric version of warfare2 variable
datonset <- datonset %>%
  mutate(warfare2b = ifelse(warfare2 == "Symmetric", 1, 0))

##### Model 1 (bivariate) #####
lag5_1_l = glm(warfare2b ~ onsettype_new, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 2 (Limited controls) #####
lag5_2_l = glm(warfare2b ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls) #####
lag5_3_l = glm(warfare2b ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac
               + marxist + ethnowar + secessionist + islam, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 4 (Additional structural controls) #####
lag5_4_l = glm(warfare2b ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac
               + fl_oil + milper, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 5 (Full controls) #####
lag5_5_l = glm(warfare2b ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac +
                 fl_oil + milper + marxist + ethnowar + secessionist + islam, 
               data = datonset, 
               family = binomial(link = "logit"))

## Get all the models in one table
stargazer(lag5_1_l, lag5_2_l, lag5_3_l, lag5_4_l, lag5_5_l, type = "text")

# Get pseudo R-squared for each model
pR2(lag5_1_l)["McFadden"]
pR2(lag5_2_l)["McFadden"]
pR2(lag5_3_l)["McFadden"]
pR2(lag5_4_l)["McFadden"]
pR2(lag5_5_l)["McFadden"]





###################### Figure A1 ######################

### Plot predicted probabilities (GDP lag 5 years)

# Create new data frame with assigned values for all controls
newdatawarfare_lag5 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                                 lngdppc_lag5 = mean(lngdppc_lag5, na.rm = TRUE),
                                                 post_cw = median(post_cw),
                                                 mountain2 = mean(mountain2, na.rm = TRUE),
                                                 polity = mean(polity, na.rm = TRUE),
                                                 ethfrac = mean(ethfrac, na.rm = TRUE),
                                                 fl_oil = median(fl_oil, na.rm = TRUE),
                                                 milper = mean(milper, na.rm = TRUE),
                                                 marxist = median(marxist),
                                                 ethnowar = median(ethnowar),
                                                 secessionist = median(secessionist),
                                                 islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_warfare_lag5 <- predict(lag5_5_l, newdatawarfare_lag5, type = "response", se.fit = TRUE)
predprod_warfare_lag5$pred.prob <- predprod_warfare_lag5$fit

# Calculate the bonds on the CIs for each pred. prob.
predprod_warfare_lag5$lowerCI <- predprod_warfare_lag5$fit - (1.96*predprod_warfare_lag5$se.fit)
predprod_warfare_lag5$upperCI <- predprod_warfare_lag5$fit + (1.96*predprod_warfare_lag5$se.fit)

# Convert list into dataframe
predprod_warfare_lag5 <- as.data.frame(predprod_warfare_lag5)

# Add onset type variable
predprod_warfare_lag5$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

# Plot the predicted probabilities with their respective CIs
ggplot(predprod_warfare_lag5, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





################## Table A5 ################## 

### Warfare multinomial natural resources ###

##### Model 1 (bivariate) #####
war_1 = multinom(warfare_new ~ onsettype_new, 
                 data = datonset, 
                 model = TRUE)
# Get the number of observations
nrow(fitted(war_1))

##### Model 2 (Limited controls) #####
war_2 = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
                 data = datonset, 
                 model = TRUE)
# Get the number of observations
nrow(fitted(war_2))

##### Model 3 (Additional identity and goals controls) #####
war_3 = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
                 + marxist + ethnowar + secessionist + islam, 
                 data = datonset, 
                 model = TRUE)
# Get the number of observations
nrow(fitted(war_3))

##### Model 4 (Additional structural controls) #####
war_4 = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac
                 + natres + milper, 
                 data = datonset, 
                 model = TRUE)
# Get the number of observations
nrow(fitted(war_4))

##### Model 5 (Full controls) #####
war_5 = multinom(warfare_new ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                   natres + milper + marxist + ethnowar + secessionist + islam, 
                 data = datonset, 
                 model = TRUE)
# Get the number of observations
nrow(fitted(war_5))

## Get all the models in one table
stargazer(war_1, war_2, war_3, war_4, war_5, type = "text")

### See script for main text to create function to calculate pseudo R^2

# Compute McFadden's pseudo-R^2 for each model
R2_war_1 <- compute_pseudo_r2(war_1)
R2_war_2 <- compute_pseudo_r2(war_2)
R2_war_3 <- compute_pseudo_r2(war_3)
R2_war_4 <- compute_pseudo_r2(war_4)
R2_war_5 <- compute_pseudo_r2(war_5)

# Return values
cat("McFadden's R-squared for Model 1:", R2_war_1, "\n")
cat("McFadden's R-squared for Model 2:", R2_war_2, "\n")
cat("McFadden's R-squared for Model 3:", R2_war_3, "\n")
cat("McFadden's R-squared for Model 4:", R2_war_4, "\n")
cat("McFadden's R-squared for Model 5:", R2_war_5, "\n")





################## Table A6 ################## 

### Logit regression on intensity (75% threshold) ###

##### Model 1 (bivariate)
conint1 = glm(intensity75 ~ onsettype_new,
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 2 (Limited controls)
conint2 = glm(intensity75 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
conint3 = glm(intensity75 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                + marxist + secessionist + islam, 
              data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
conint4 = glm(intensity75 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil, 
              data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
conint5 = glm(intensity75 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil + marxist + secessionist + islam,
              data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(conint1, conint2, conint3, conint4, conint5, type = "text")

# Get pseudo R-squared for each model
pR2(conint1)["McFadden"]
pR2(conint2)["McFadden"]
pR2(conint3)["McFadden"]
pR2(conint4)["McFadden"]
pR2(conint5)["McFadden"]





###################### Figure A2 ######################

# Create new data frame with assigned values for all controls
newdataconseverity75 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                                  lngdppc = mean(lngdppc, na.rm = TRUE),
                                                  post_cw = median(post_cw),
                                                  mountain2 = mean(mountain2, na.rm = TRUE),
                                                  polity = mean(polity, na.rm = TRUE),
                                                  ethfrac = mean(ethfrac, na.rm = TRUE),
                                                  milper = mean(milper, na.rm = TRUE),
                                                  lnpop = mean(lnpop, na.rm = TRUE),
                                                  fl_oil = median(fl_oil, na.rm = TRUE),
                                                  marxist = median(marxist),
                                                  secessionist = median(secessionist),
                                                  islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_conseverity75 <- predict(conint5, newdataconseverity75, type = "response", se.fit = TRUE)
predprod_conseverity75$pred.prob <- predprod_conseverity75$fit

# Convert list into dataframe
predprod_conseverity75 <- as.data.frame(predprod_conseverity75)

# Calculate the bonds on the CIs for each pred. prob.
predprod_conseverity75$lowerCI <- predprod_conseverity75$fit - (1.96*predprod_conseverity75$se.fit)
predprod_conseverity75$upperCI <- predprod_conseverity75$fit + (1.96*predprod_conseverity75$se.fit)

# Add onset type variable
predprod_conseverity75$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_conseverity75, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Table A7 ######################

##### Model 1 (bivariate)
onsetint1b = glm(onsetint ~ onsettype_new,
                 data = datonset, 
                 family = binomial(link = "logit"))

##### Model 2 (Limited controls)
onsetint2b = glm(onsetint ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
                 data = datonset, 
                 family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
onsetint3b = glm(onsetint ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                   + marxist + secessionist + islam, 
                 data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
onsetint4b = glm(onsetint ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                   milper + lnpop + fl_oil, 
                 data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
onsetint5b = glm(onsetint ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                   milper + lnpop + fl_oil + marxist + secessionist + islam,
                 data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(onsetint1b, onsetint2b, onsetint3b, onsetint4b, onsetint5b, type = "text")

# Get pseudo R-squared for each model
pR2(onsetint1b)["McFadden"]
pR2(onsetint2b)["McFadden"]
pR2(onsetint3b)["McFadden"]
pR2(onsetint4b)["McFadden"]
pR2(onsetint5b)["McFadden"]





###################### Figure A3 ######################

# Create new data frame with assigned values for all controls
newdataonsetint1 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                              lngdppc = mean(lngdppc, na.rm = TRUE),
                                              post_cw = median(post_cw),
                                              mountain2 = mean(mountain2, na.rm = TRUE),
                                              polity = mean(polity, na.rm = TRUE),
                                              ethfrac = mean(ethfrac, na.rm = TRUE),
                                              milper = mean(milper, na.rm = TRUE),
                                              lnpop = mean(lnpop, na.rm = TRUE),
                                              fl_oil = median(fl_oil, na.rm = TRUE),
                                              marxist = median(marxist),
                                              secessionist = median(secessionist),
                                              islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_onsetint <- predict(onsetint5b, newdataonsetint1, type = "response", se.fit = TRUE)
predprod_onsetint$pred.prob <- predprod_onsetint$fit

# Convert list into dataframe
predprod_onsetint <- as.data.frame(predprod_onsetint)

# Calculate the bonds on the CIs for each pred. prob.
predprod_onsetint$lowerCI <- predprod_onsetint$fit - (1.96*predprod_onsetint$se.fit)
predprod_onsetint$upperCI <- predprod_onsetint$fit + (1.96*predprod_onsetint$se.fit)

# Add onset type variable
predprod_onsetint$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_onsetint, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Table A8 ######################

##### Model 1 (bivariate)
onsetint1b2 = glm(onsetint2 ~ onsettype_new,
                  data = datonset, 
                  family = binomial(link = "logit"))

##### Model 2 (Limited controls)
onsetint2b2 = glm(onsetint2 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
                  data = datonset, 
                  family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
onsetint3b2 = glm(onsetint2 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                    + marxist + secessionist + islam, 
                  data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
onsetint4b2 = glm(onsetint2 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                    milper + lnpop + fl_oil, 
                  data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
onsetint5b2 = glm(onsetint2 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                    milper + lnpop + fl_oil + marxist + secessionist + islam,
                  data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(onsetint1b2, onsetint2b2, onsetint3b2, onsetint4b2, onsetint5b2, type = "text")

# Get pseudo R-squared for each model
pR2(onsetint1b2)["McFadden"]
pR2(onsetint2b2)["McFadden"]
pR2(onsetint3b2)["McFadden"]
pR2(onsetint4b2)["McFadden"]
pR2(onsetint5b2)["McFadden"]





###################### Figure A4 ######################

# Create new data frame with assigned values for all controls
newdataonsetint2 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                              lngdppc = mean(lngdppc, na.rm = TRUE),
                                              post_cw = median(post_cw),
                                              mountain2 = mean(mountain2, na.rm = TRUE),
                                              polity = mean(polity, na.rm = TRUE),
                                              ethfrac = mean(ethfrac, na.rm = TRUE),
                                              milper = mean(milper, na.rm = TRUE),
                                              lnpop = mean(lnpop, na.rm = TRUE),
                                              fl_oil = median(fl_oil, na.rm = TRUE),
                                              marxist = median(marxist),
                                              secessionist = median(secessionist),
                                              islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_onsetint2 <- predict(onsetint5b2, newdataonsetint2, type = "response", se.fit = TRUE)
predprod_onsetint2$pred.prob <- predprod_onsetint2$fit

# Convert list into dataframe
predprod_onsetint2 <- as.data.frame(predprod_onsetint2)

# Calculate the bonds on the CIs for each pred. prob.
predprod_onsetint2$lowerCI <- predprod_onsetint2$fit - (1.96*predprod_onsetint2$se.fit)
predprod_onsetint2$upperCI <- predprod_onsetint2$fit + (1.96*predprod_onsetint2$se.fit)

# Add onset type variable
predprod_onsetint2$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_onsetint2, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Table A9 ######################
##### 60 percent threshold #####
##### Model 1 (bivariate)
int1_l5 = glm(intensity60 ~ onsettype_new,
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 2 (Limited controls)
int2_l5 = glm(intensity60 ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac, 
              data = datonset, 
              family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
int3_l5 = glm(intensity60 ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac +
                + marxist + secessionist + islam, 
              data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
int4_l5 = glm(intensity60 ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil, 
              data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
int5_l5 = glm(intensity60 ~ onsettype_new + lngdppc_lag5 + post_cw + mountain2 + polity + ethfrac +
                milper + lnpop + fl_oil + marxist + secessionist + islam,
              data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(int1_l5, int2_l5, int3_l5, int4_l5, int5_l5, type = "text")

# Get pseudo R-squared for each model
pR2(int1_l5)["McFadden"]
pR2(int2_l5)["McFadden"]
pR2(int3_l5)["McFadden"]
pR2(int4_l5)["McFadden"]
pR2(int5_l5)["McFadden"]





###################### Figure A5 ######################

# Create new data frame with assigned values for all controls
newdataconseverity60_l5 <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                                     lngdppc_lag5 = mean(lngdppc_lag5, na.rm = TRUE),
                                                     post_cw = median(post_cw),
                                                     mountain2 = mean(mountain2, na.rm = TRUE),
                                                     polity = mean(polity, na.rm = TRUE),
                                                     ethfrac = mean(ethfrac, na.rm = TRUE),
                                                     milper = mean(milper, na.rm = TRUE),
                                                     lnpop = mean(lnpop, na.rm = TRUE),
                                                     fl_oil = median(fl_oil, na.rm = TRUE),
                                                     marxist = median(marxist),
                                                     secessionist = median(secessionist),
                                                     islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_conseverity60_l5 <- predict(int5_l5, newdataconseverity60_l5, type = "response", se.fit = TRUE)
predprod_conseverity60_l5$pred.prob <- predprod_conseverity60_l5$fit

# Convert list into dataframe
predprod_conseverity60_l5 <- as.data.frame(predprod_conseverity60_l5)

# Calculate the bonds on the CIs for each pred. prob.
predprod_conseverity60_l5$lowerCI <- predprod_conseverity60_l5$fit - (1.96*predprod_conseverity60_l5$se.fit)
predprod_conseverity60_l5$upperCI <- predprod_conseverity60_l5$fit + (1.96*predprod_conseverity60_l5$se.fit)

# Add onset type variable
predprod_conseverity60_l5$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_conseverity60_l5, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=18)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\n challenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Table A10 ######################

##### 60 percent threshold #####
##### Model 1 (bivariate)
int1_nat = glm(intensity60 ~ onsettype_new,
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 2 (Limited controls)
int2_nat = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac, 
               data = datonset, 
               family = binomial(link = "logit"))

##### Model 3 (Additional identity and goals controls)
int3_nat = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                 + marxist + secessionist + islam, 
               data = datonset, family = binomial(link = "logit"))

##### Model 4 (Additional structural controls)
int4_nat = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                 milper + lnpop + natres, 
               data = datonset, family = binomial(link = "logit"))

##### Model 5 (Full controls)
int5_nat = glm(intensity60 ~ onsettype_new + lngdppc + post_cw + mountain2 + polity + ethfrac +
                 milper + lnpop + natres + marxist + secessionist + islam,
               data = datonset, family = binomial(link = "logit"))

## Get all the models in one table
stargazer(int1_nat, int2_nat, int3_nat, int4_nat, int5_nat, type = "text")

# Get pseudo R-squared for each model
pR2(int1_nat)["McFadden"]
pR2(int2_nat)["McFadden"]
pR2(int3_nat)["McFadden"]
pR2(int4_nat)["McFadden"]
pR2(int5_nat)["McFadden"]





###################### Figure A6 ######################

# Create new data frame with assigned values for all controls
newdataconseverity60_nat <- with(datonset, data.frame(onsettype_new = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                                                      lngdppc = mean(lngdppc, na.rm = TRUE),
                                                      post_cw = median(post_cw),
                                                      mountain2 = mean(mountain2, na.rm = TRUE),
                                                      polity = mean(polity, na.rm = TRUE),
                                                      ethfrac = mean(ethfrac, na.rm = TRUE),
                                                      milper = mean(milper, na.rm = TRUE),
                                                      lnpop = mean(lnpop, na.rm = TRUE),
                                                      natres = median(natres, na.rm = TRUE),
                                                      marxist = median(marxist),
                                                      secessionist = median(secessionist),
                                                      islam = median(islam)))

# Calculate predicted probabilities for each type of onset with standard errors
predprod_conseverity60_nat <- predict(int5_nat, newdataconseverity60_nat, type = "response", se.fit = TRUE)
predprod_conseverity60_nat$pred.prob <- predprod_conseverity60_nat$fit

# Convert list into dataframe
predprod_conseverity60_nat <- as.data.frame(predprod_conseverity60_nat)

# Calculate the bonds on the CIs for each pred. prob.
predprod_conseverity60_nat$lowerCI <- predprod_conseverity60_nat$fit - (1.96*predprod_conseverity60_nat$se.fit)
predprod_conseverity60_nat$upperCI <- predprod_conseverity60_nat$fit + (1.96*predprod_conseverity60_nat$se.fit)

# Add onset type variable
predprod_conseverity60_nat$onsettype_new <- c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests")

## Plot the predicted probabilities with their respective CIs
ggplot(predprod_conseverity60_nat, aes(x = onsettype_new, y = pred.prob)) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, fill = onsettype_new), color = "darkslategrey", alpha = 0.8) +
  xlab("") +
  ylab("Predicted probabilities") +
  ylim(0,1.25) +
  scale_y_continuous(breaks = seq(0, 1.2, 0.2)) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"),
        text = element_text(size=16)) +
  scale_x_discrete(limits = c("Peripheral challenge", "State disintegration", "Coup", "Insurrectionary protests"), 
                   labels = c("Peripheral challenge" = "Peripheral\nchallenge",
                              "State disintegration" = "State\n disintegration", 
                              "Coup" = "Coup",
                              "Insurrectionary protests" = "Insurrectionary\n protests"))





###################### Figure A8 ######################

# Note on insurrectionary protests: Get distribution of duration for this category
datonset_duration_is <- datonset %>%
  filter(onsettype == "Insurrectionary protests") %>%
  group_by(duration_years) %>%
  summarise(n = n())

# Plot
ggplot(datonset_duration_is, aes(x = duration_years, y = n)) +
  geom_histogram(aes(fill = duration_years), stat = "identity", fill = "slategray4") +
  theme_bw() +
  ylab("Number of observations") +
  xlab("Conflict duration (in years)") +
  theme(legend.position = "none", text = element_text(size=15)) +
  scale_x_continuous(n.breaks = 10) +
  geom_vline(data = datonset, aes(xintercept = mean(duration_years, na.rm = TRUE)),
             color = "red", linetype = "dashed")





###################### Figure A9 ######################

ggplot(datonset_duration_is) +
  aes(x = "", y = duration_years) +
  geom_boxplot(fill = "slategray4") +
  theme_minimal() +
  ylab("Distribution") +
  theme(legend.position = "none", text = element_text(size=15))





###################### Figure A10 ######################

#### Test for proportional hazards assumption
summary(onset_cox5) # summarizes model with all covariates
test.ph <- cox.zph(onset_cox5) # tests for the PH assumption
test.ph

# Plot the Schoenfeld residuals
ggcoxzph(test.ph) # a non-zero ("non-flat") slope is indicative of violating of the HP assumption.





###################### Figure A11 ######################

#### Testing for influential observation/outliers
ggcoxdiagnostics(onset_cox5, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())





###################### Table A11 ######################

# Drop that case of Northern Ireland from the analysis
datonset2 <- subset(datonset, conflict_id != 178)

# Store duration data
surv_years <- Surv(time = datonset2$duration_years, event = datonset2$ended)

##### Model 1 (bivariate) #####
cox_l5 <- coxph(surv_years ~ onsettype_new, data = datonset2)

##### Model 2 (Limited controls) #####
cox2_l5 <- coxph(surv_years ~ onsettype_new + lngdppc_lag5 + milper + post_cw + mountain2 + marxist + ethfrac, 
                 data = datonset2)

##### Model 3 (Additional identity and goals controls) #####
cox3_l5 <- coxph(surv_years ~ onsettype_new + lngdppc_lag5 + milper + post_cw + mountain2 + marxist + ethfrac + secessionist + islam, 
                 data = datonset2)

##### Model 4 (Additional structural controls) #####
cox4_l5 <- coxph(surv_years ~ onsettype_new + lngdppc_lag5 + milper + post_cw + mountain2 + polity + fl_oil + marxist + ethfrac, 
                 data = datonset2)

##### Model 5 (Full controls) #####
cox5_l5 <- coxph(surv_years ~ onsettype_new + lngdppc_lag5 + milper + post_cw + mountain2 + polity + fl_oil + marxist + ethfrac + secessionist + islam, 
                 data = datonset2)

### Summary of results
stargazer(cox_l5, cox2_l5, cox3_l5, cox4_l5, cox5_l5, type = "text")





###################### Table A12 ######################

# Drop that case of Northern Ireland from the analysis
datonset2 <- subset(datonset, conflict_id != 178)

# Store duration data
surv_years <- Surv(time = datonset2$duration_years, event = datonset2$ended)

##### Model 1 (bivariate) #####
cox_nat <- coxph(surv_years ~ onsettype_new, data = datonset2)

##### Model 2 (Limited controls) #####
cox2_nat <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + marxist + ethfrac, 
                  data = datonset2)

##### Model 3 (Additional identity and goals controls) #####
cox3_nat <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + marxist + ethfrac + secessionist + islam, 
                  data = datonset2)

##### Model 4 (Additional structural controls) #####
cox4_nat <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + polity + natres + marxist + ethfrac, 
                  data = datonset2)

##### Model 5 (Full controls) #####
cox5_nat <- coxph(surv_years ~ onsettype_new + lngdppc + milper + post_cw + mountain2 + polity + natres + marxist + ethfrac + secessionist + islam, 
                  data = datonset2)

### Summary of results
stargazer(cox_nat, cox2_nat, cox3_nat, cox4_nat, cox5_nat, type = "text")


#######################################################
